

# Peptidoform level analysis #


source('D:/Pipeline comparisons/Writing/R Functions/Binomial Scores Function Peptidoform level.R')
source('D:/Pipeline comparisons/Writing/R Functions/FLR function Bin Adjusted.R')
source('D:/Pipeline comparisons/Writing/R Functions/Function frequency of site.R')


# We want to assess if there are any differences between the Max and MM collapsing methods using all rice data sets #

#> [conflicted] Will prefer dplyr::filter over any other package
suppressPackageStartupMessages(library("tidyverse"))

library(dplyr)
library(stringr)
library(useful)
library(MASS)
library(reshape2)
library(epiDisplay)

# First we calculate all binomial adjusted data #
#################################################

PXD000923A <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/TPP/Rice/TPP_PXD000923_A_PSMSITE.csv')
PXD002222A <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/TPP/Rice/TPP_PXD002222_A_PSMSITE.csv')
PXD002756A <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/TPP/Rice/TPP_PXD002756_A_PSMSITE.csv')
PXD004705A <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/TPP/Rice/TPP_PXD004705_A_PSMSITE.csv')
PXD004939A <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/TPP/Rice/TPP_PXD004939_A_PSMSITE.csv')
PXD005241A <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/TPP/Rice/TPP_PXD005241_A_PSMSITE.csv')
PXD012764A <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/TPP/Rice/TPP_PXD012764_A_PSMSITE.csv')
PXD019291A <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/TPP/Rice/TPP_PXD019291_A_PSMSITE.csv')

PXD000923 <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/TPP/Rice/TPP_PXD000923_PSMSITE.csv')
PXD002222 <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/TPP/Rice/TPP_PXD002222_PSMSITE.csv')
PXD002756 <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/TPP/Rice/TPP_PXD002756_PSMSITE.csv')
PXD004705 <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/TPP/Rice/TPP_PXD004705_PSMSITE.csv')
PXD004939 <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/TPP/Rice/TPP_PXD004939_PSMSITE.csv')
PXD005241 <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/TPP/Rice/TPP_PXD005241_PSMSITE.csv')
PXD012764 <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/TPP/Rice/TPP_PXD012764_PSMSITE.csv')
PXD019291 <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/TPP/Rice/TPP_PXD019291_PSMSITE.csv')

PXD000923A$dataset <- "PXD000923"
PXD002222A$dataset <- "PXD002222"
PXD002756A$dataset <- "PXD002756"
PXD004705A$dataset <- "PXD004705"
PXD004939A$dataset <- "PXD004939"
PXD005241A$dataset <- "PXD005241"
PXD012764A$dataset <- "PXD012764"
PXD019291A$dataset <- "PXD019291"

PXD000923$dataset <- "PXD000923"
PXD002222$dataset <- "PXD002222"
PXD002756$dataset <- "PXD002756"
PXD004705$dataset <- "PXD004705"
PXD004939$dataset <- "PXD004939"
PXD005241$dataset <- "PXD005241"
PXD012764$dataset <- "PXD012764"
PXD019291$dataset <- "PXD019291"

# pSTY peptidoform level #
##########################

PXD000923$Peptidoform <- paste0(PXD000923$Peptide_mod,"_",PXD000923$PTM.positions)
PXD002222$Peptidoform <- paste0(PXD002222$Peptide_mod,"_",PXD002222$PTM.positions)
PXD002756$Peptidoform <- paste0(PXD002756$Peptide_mod,"_",PXD002756$PTM.positions)
PXD004705$Peptidoform <- paste0(PXD004705$Peptide_mod,"_",PXD004705$PTM.positions)
PXD004939$Peptidoform <- paste0(PXD004939$Peptide_mod,"_",PXD004939$PTM.positions)
PXD005241$Peptidoform <- paste0(PXD005241$Peptide_mod,"_",PXD005241$PTM.positions)
PXD012764$Peptidoform <- paste0(PXD012764$Peptide_mod,"_",PXD012764$PTM.positions)
PXD019291$Peptidoform <- paste0(PXD019291$Peptide_mod,"_",PXD019291$PTM.positions)

PXD000923$PTM_final_prob <- PXD000923$Score*PXD000923$PTM.Score
PXD002222$PTM_final_prob <- PXD002222$Score*PXD002222$PTM.Score
PXD002756$PTM_final_prob <- PXD002756$Score*PXD002756$PTM.Score
PXD004705$PTM_final_prob <- PXD004705$Score*PXD004705$PTM.Score
PXD004939$PTM_final_prob <- PXD004939$Score*PXD004939$PTM.Score
PXD005241$PTM_final_prob <- PXD005241$Score*PXD005241$PTM.Score
PXD012764$PTM_final_prob <- PXD012764$Score*PXD012764$PTM.Score
PXD019291$PTM_final_prob <- PXD019291$Score*PXD019291$PTM.Score

PXD000923$PROTEIN_LOC <- paste0(PXD000923$Protein,"_",PXD000923$Protein.position)
PXD002222$PROTEIN_LOC <- paste0(PXD002222$Protein,"_",PXD002222$Protein.position)
PXD002756$PROTEIN_LOC <- paste0(PXD002756$Protein,"_",PXD002756$Protein.position)
PXD004705$PROTEIN_LOC <- paste0(PXD004705$Protein,"_",PXD004705$Protein.position)
PXD004939$PROTEIN_LOC <- paste0(PXD004939$Protein,"_",PXD004939$Protein.position)
PXD005241$PROTEIN_LOC <- paste0(PXD005241$Protein,"_",PXD005241$Protein.position)
PXD012764$PROTEIN_LOC <- paste0(PXD012764$Protein,"_",PXD012764$Protein.position)
PXD019291$PROTEIN_LOC <- paste0(PXD019291$Protein,"_",PXD019291$Protein.position)

# Data collapsed by taking the Max by peptidoform #

PXD000923_pform <- PXD000923 %>% group_by(Peptidoform) %>% top_n(1, PTM_final_prob)
PXD002222_pform <- PXD002222 %>% group_by(Peptidoform) %>% top_n(1, PTM_final_prob)
PXD002756_pform <- PXD002756 %>% group_by(Peptidoform) %>% top_n(1, PTM_final_prob)
PXD004705_pform <- PXD004705 %>% group_by(Peptidoform) %>% top_n(1, PTM_final_prob)
PXD004939_pform <- PXD004939 %>% group_by(Peptidoform) %>% top_n(1, PTM_final_prob)
PXD005241_pform <- PXD005241 %>% group_by(Peptidoform) %>% top_n(1, PTM_final_prob)
PXD012764_pform <- PXD012764 %>% group_by(Peptidoform) %>% top_n(1, PTM_final_prob)
PXD019291_pform <- PXD019291 %>% group_by(Peptidoform) %>% top_n(1, PTM_final_prob)


# pASTY peptidoform level #
##########################


library(plyr)

PXD000923A_pform <- binAdjustPform(PXD000923A)
PXD002222A_pform <- binAdjustPform(PXD002222A)
PXD002756A_pform <- binAdjustPform(PXD002756A)
PXD004705A_pform <- binAdjustPform(PXD004705A)
PXD004939A_pform <- binAdjustPform(PXD004939A)
PXD005241A_pform <- binAdjustPform(PXD005241A)
PXD012764A_pform <- binAdjustPform(PXD012764A)
PXD019291A_pform <- binAdjustPform(PXD019291A)

detach(package:plyr)


PXD000923A_pform <- FLR_AdjTPP(PXD000923A_pform)
PXD002222A_pform <- FLR_AdjTPP(PXD002222A_pform)
PXD002756A_pform <- FLR_AdjTPP(PXD002756A_pform)
PXD004705A_pform <- FLR_AdjTPP(PXD004705A_pform)
PXD004939A_pform <- FLR_AdjTPP(PXD004939A_pform)
PXD005241A_pform <- FLR_AdjTPP(PXD005241A_pform)
PXD012764A_pform <- FLR_AdjTPP(PXD012764A_pform)
PXD019291A_pform <- FLR_AdjTPP(PXD019291A_pform)

PXD000923A_pformc <- PXD000923A_pform[setdiff(names(PXD000923A_pform), c("PROTEIN_POS", "PRO_pos_list", "PTM_length", "PTM_beg2", "PTM_end2", "PTM_End", "PTM_Beginning"))]
PXD002222A_pformc <- PXD002222A_pform[setdiff(names(PXD002222A_pform), c("PROTEIN_POS", "PRO_pos_list", "PTM_length", "PTM_beg2", "PTM_end2", "PTM_End", "PTM_Beginning"))]
PXD002756A_pformc <- PXD002756A_pform[setdiff(names(PXD002756A_pform), c("PROTEIN_POS", "PRO_pos_list", "PTM_length", "PTM_beg2", "PTM_end2", "PTM_End", "PTM_Beginning"))]
PXD004705A_pformc <- PXD004705A_pform[setdiff(names(PXD004705A_pform), c("PROTEIN_POS", "PRO_pos_list", "PTM_length", "PTM_beg2", "PTM_end2", "PTM_End", "PTM_Beginning"))]
PXD004939A_pformc <- PXD004939A_pform[setdiff(names(PXD004939A_pform), c("PROTEIN_POS", "PRO_pos_list", "PTM_length", "PTM_beg2", "PTM_end2", "PTM_End", "PTM_Beginning"))]
PXD005241A_pformc <- PXD005241A_pform[setdiff(names(PXD005241A_pform), c("PROTEIN_POS", "PRO_pos_list", "PTM_length", "PTM_beg2", "PTM_end2", "PTM_End", "PTM_Beginning"))]
PXD012764A_pformc <- PXD012764A_pform[setdiff(names(PXD012764A_pform), c("PROTEIN_POS", "PRO_pos_list", "PTM_length", "PTM_beg2", "PTM_end2", "PTM_End", "PTM_Beginning"))]
PXD019291A_pformc <- PXD019291A_pform[setdiff(names(PXD019291A_pform), c("PROTEIN_POS", "PRO_pos_list", "PTM_length", "PTM_beg2", "PTM_end2", "PTM_End", "PTM_Beginning"))]


write.csv(PXD000923_pform, "D:/Pipeline comparisons/Writing/Data/TPP/Rice/TPP_pform/PXD000923_pform.csv")
write.csv(PXD002222_pform, "D:/Pipeline comparisons/Writing/Data/TPP/Rice/TPP_pform/PXD002222_pform.csv")
write.csv(PXD002756_pform, "D:/Pipeline comparisons/Writing/Data/TPP/Rice/TPP_pform/PXD002756_pform.csv")
write.csv(PXD004705_pform, "D:/Pipeline comparisons/Writing/Data/TPP/Rice/TPP_pform/PXD004705_pform.csv")
write.csv(PXD004939_pform, "D:/Pipeline comparisons/Writing/Data/TPP/Rice/TPP_pform/PXD004939_pform.csv")
write.csv(PXD005241_pform, "D:/Pipeline comparisons/Writing/Data/TPP/Rice/TPP_pform/PXD005241_pform.csv")
write.csv(PXD012764_pform, "D:/Pipeline comparisons/Writing/Data/TPP/Rice/TPP_pform/PXD012764_pform.csv")
write.csv(PXD019291_pform, "D:/Pipeline comparisons/Writing/Data/TPP/Rice/TPP_pform/PXD019291_pform.csv")

write.csv(PXD000923A_pformc, "D:/Pipeline comparisons/Writing/Data/TPP/Rice/TPP_pform/PXD000923A_pform.csv")
write.csv(PXD002222A_pformc, "D:/Pipeline comparisons/Writing/Data/TPP/Rice/TPP_pform/PXD002222A_pform.csv")
write.csv(PXD002756A_pformc, "D:/Pipeline comparisons/Writing/Data/TPP/Rice/TPP_pform/PXD002756A_pform.csv")
write.csv(PXD004705A_pformc, "D:/Pipeline comparisons/Writing/Data/TPP/Rice/TPP_pform/PXD004705A_pform.csv")
write.csv(PXD004939A_pformc, "D:/Pipeline comparisons/Writing/Data/TPP/Rice/TPP_pform/PXD004939A_pform.csv")
write.csv(PXD005241A_pformc, "D:/Pipeline comparisons/Writing/Data/TPP/Rice/TPP_pform/PXD005241A_pform.csv")
write.csv(PXD012764A_pformc, "D:/Pipeline comparisons/Writing/Data/TPP/Rice/TPP_pform/PXD012764A_pform.csv")
write.csv(PXD019291A_pformc, "D:/Pipeline comparisons/Writing/Data/TPP/Rice/TPP_pform/PXD019291A_pform.csv")

#######################################################################################################################################

# Calculating FLR data at peptidoform level #

#######################################################################################################################################



AllRice_pASTY_pform <-dplyr::bind_rows(PXD000923A_pformc,PXD002222A_pformc, PXD002756A_pformc, PXD004705A_pformc,PXD004939A_pformc, PXD005241A_pformc,
                                       PXD012764A_pformc,PXD019291A_pformc)

AllRice_pASTY_pform_Excluding_A <- AllRice_pASTY_pform[AllRice_pASTY_pform$Amino!="A",]

tab1(AllRice_pASTY_pform_Excluding_A$dataset)

AllRice_pSTY_pform <-dplyr::bind_rows(PXD000923_pform,PXD002222_pform, PXD002756_pform, PXD004705_pform,PXD004939_pform, PXD005241_pform,
                                      PXD012764_pform,PXD019291_pform)

tab1(AllRice_pSTY_pform$dataset)

PXD000923A_pform_01 <- PXD000923A_pformc[1:max(which(PXD000923A_pformc$FLR_Adj_Score<=0.01)),]
PXD002222A_pform_01 <- PXD002222A_pformc[1:max(which(PXD002222A_pformc$FLR_Adj_Score<=0.01)),]
PXD002756A_pform_01 <- PXD002756A_pformc[1:max(which(PXD002756A_pformc$FLR_Adj_Score<=0.01)),]
PXD004705A_pform_01 <- PXD004705A_pformc[1:max(which(PXD004705A_pformc$FLR_Adj_Score<=0.01)),]
PXD004939A_pform_01 <- PXD004939A_pformc[1:max(which(PXD004939A_pformc$FLR_Adj_Score<=0.01)),]
PXD005241A_pform_01 <- PXD005241A_pformc[1:max(which(PXD005241A_pformc$FLR_Adj_Score<=0.01)),]
PXD012764A_pform_01 <- PXD012764A_pformc[1:max(which(PXD012764A_pformc$FLR_Adj_Score<=0.01)),]
PXD019291A_pform_01 <- PXD019291A_pformc[1:max(which(PXD019291A_pformc$FLR_Adj_Score<=0.01)),]

PXD000923A_pform_02.5 <- PXD000923A_pformc[1:max(which(PXD000923A_pformc$FLR_Adj_Score<=0.025)),]
PXD002222A_pform_02.5 <- PXD002222A_pformc[1:max(which(PXD002222A_pformc$FLR_Adj_Score<=0.025)),]
PXD002756A_pform_02.5 <- PXD002756A_pformc[1:max(which(PXD002756A_pformc$FLR_Adj_Score<=0.025)),]
PXD004705A_pform_02.5 <- PXD004705A_pformc[1:max(which(PXD004705A_pformc$FLR_Adj_Score<=0.025)),]
PXD004939A_pform_02.5 <- PXD004939A_pformc[1:max(which(PXD004939A_pformc$FLR_Adj_Score<=0.025)),]
PXD005241A_pform_02.5 <- PXD005241A_pformc[1:max(which(PXD005241A_pformc$FLR_Adj_Score<=0.025)),]
PXD012764A_pform_02.5 <- PXD012764A_pformc[1:max(which(PXD012764A_pformc$FLR_Adj_Score<=0.025)),]
PXD019291A_pform_02.5 <- PXD019291A_pformc[1:max(which(PXD019291A_pformc$FLR_Adj_Score<=0.025)),]

PXD000923A_pform_05 <- PXD000923A_pformc[1:max(which(PXD000923A_pformc$FLR_Adj_Score<=0.05)),]
PXD002222A_pform_05 <- PXD002222A_pformc[1:max(which(PXD002222A_pformc$FLR_Adj_Score<=0.05)),]
PXD002756A_pform_05 <- PXD002756A_pformc[1:max(which(PXD002756A_pformc$FLR_Adj_Score<=0.05)),]
PXD004705A_pform_05 <- PXD004705A_pformc[1:max(which(PXD004705A_pformc$FLR_Adj_Score<=0.05)),]
PXD004939A_pform_05 <- PXD004939A_pformc[1:max(which(PXD004939A_pformc$FLR_Adj_Score<=0.05)),]
PXD005241A_pform_05 <- PXD005241A_pformc[1:max(which(PXD005241A_pformc$FLR_Adj_Score<=0.05)),]
PXD012764A_pform_05 <- PXD012764A_pformc[1:max(which(PXD012764A_pformc$FLR_Adj_Score<=0.05)),]
PXD019291A_pform_05 <- PXD019291A_pformc[1:max(which(PXD019291A_pformc$FLR_Adj_Score<=0.05)),]


AllRice_pASTY_pform_01<-dplyr::bind_rows(PXD000923A_pform_01, PXD002222A_pform_01, PXD002756A_pform_01, PXD004705A_pform_01,
                                         PXD004939A_pform_01, PXD005241A_pform_01, PXD012764A_pform_01, PXD019291A_pform_01)

AllRice_pASTY_pform_02.5<-dplyr::bind_rows(PXD000923A_pform_02.5, PXD002222A_pform_02.5, PXD002756A_pform_02.5, PXD004705A_pform_02.5,
                                         PXD004939A_pform_02.5, PXD005241A_pform_02.5, PXD012764A_pform_02.5, PXD019291A_pform_02.5)

AllRice_pASTY_pform_05<-dplyr::bind_rows(PXD000923A_pform_05, PXD002222A_pform_05, PXD002756A_pform_05, PXD004705A_pform_05,
                                         PXD004939A_pform_05, PXD005241A_pform_05, PXD012764A_pform_05, PXD019291A_pform_05)

AllRice_pASTY_pform_01_Excluding_A <- AllRice_pASTY_pform_01[AllRice_pASTY_pform_01$Amino!="A",]

tab1(AllRice_pASTY_pform_01_Excluding_A$dataset)

AllRice_pASTY_pform_02.5_Excluding_A <- AllRice_pASTY_pform_02.5[AllRice_pASTY_pform_02.5$Amino!="A",]

tab1(AllRice_pASTY_pform_02.5_Excluding_A$dataset)

AllRice_pASTY_pform_05_Excluding_A <- AllRice_pASTY_pform_05[AllRice_pASTY_pform_05$Amino!="A",]

tab1(AllRice_pASTY_pform_05_Excluding_A$dataset)


PXD000923_pform_95 <- PXD000923_pform[PXD000923_pform$PTM_final_prob>=0.95,]
PXD002222_pform_95 <- PXD002222_pform[PXD002222_pform$PTM_final_prob>=0.95,]
PXD002756_pform_95 <- PXD002756_pform[PXD002756_pform$PTM_final_prob>=0.95,]
PXD004705_pform_95 <- PXD004705_pform[PXD004705_pform$PTM_final_prob>=0.95,]
PXD004939_pform_95 <- PXD004939_pform[PXD004939_pform$PTM_final_prob>=0.95,]
PXD005241_pform_95 <- PXD005241_pform[PXD005241_pform$PTM_final_prob>=0.95,]
PXD012764_pform_95 <- PXD012764_pform[PXD012764_pform$PTM_final_prob>=0.95,]
PXD019291_pform_95 <- PXD019291_pform[PXD019291_pform$PTM_final_prob>=0.95,]

PXD000923_pform_97.5 <- PXD000923_pform[PXD000923_pform$PTM_final_prob>=0.975,]
PXD002222_pform_97.5 <- PXD002222_pform[PXD002222_pform$PTM_final_prob>=0.975,]
PXD002756_pform_97.5 <- PXD002756_pform[PXD002756_pform$PTM_final_prob>=0.975,]
PXD004705_pform_97.5 <- PXD004705_pform[PXD004705_pform$PTM_final_prob>=0.975,]
PXD004939_pform_97.5 <- PXD004939_pform[PXD004939_pform$PTM_final_prob>=0.975,]
PXD005241_pform_97.5 <- PXD005241_pform[PXD005241_pform$PTM_final_prob>=0.975,]
PXD012764_pform_97.5 <- PXD012764_pform[PXD012764_pform$PTM_final_prob>=0.975,]
PXD019291_pform_97.5 <- PXD019291_pform[PXD019291_pform$PTM_final_prob>=0.975,]

PXD000923_pform_99 <- PXD000923_pform[PXD000923_pform$PTM_final_prob>=0.99,]
PXD002222_pform_99 <- PXD002222_pform[PXD002222_pform$PTM_final_prob>=0.99,]
PXD002756_pform_99 <- PXD002756_pform[PXD002756_pform$PTM_final_prob>=0.99,]
PXD004705_pform_99 <- PXD004705_pform[PXD004705_pform$PTM_final_prob>=0.99,]
PXD004939_pform_99 <- PXD004939_pform[PXD004939_pform$PTM_final_prob>=0.99,]
PXD005241_pform_99 <- PXD005241_pform[PXD005241_pform$PTM_final_prob>=0.99,]
PXD012764_pform_99 <- PXD012764_pform[PXD012764_pform$PTM_final_prob>=0.99,]
PXD019291_pform_99 <- PXD019291_pform[PXD019291_pform$PTM_final_prob>=0.99,]


AllRice_pSTY_pform_95<-dplyr::bind_rows(PXD000923_pform_95, PXD002222_pform_95, PXD002756_pform_95, PXD004705_pform_95,
                                        PXD004939_pform_95, PXD005241_pform_95, PXD012764_pform_95, PXD019291_pform_95)

AllRice_pSTY_pform_97.5<-dplyr::bind_rows(PXD000923_pform_97.5, PXD002222_pform_97.5, PXD002756_pform_97.5, PXD004705_pform_97.5,
                                          PXD004939_pform_97.5, PXD005241_pform_97.5, PXD012764_pform_97.5, PXD019291_pform_97.5)

AllRice_pSTY_pform_99<-dplyr::bind_rows(PXD000923_pform_99, PXD002222_pform_99, PXD002756_pform_99, PXD004705_pform_99,
                                        PXD004939_pform_99, PXD005241_pform_99, PXD012764_pform_99, PXD019291_pform_99)



tab1(AllRice_pSTY_pform_95$dataset)

tab1(AllRice_pSTY_pform_97.5$dataset)

tab1(AllRice_pSTY_pform_99$dataset)

